Enhance WebsiteAgent#validate_extract_options!

It now properly validates extraction details precisely depending on the
extraction type.

Akinori MUSHA 9 anni fa
parent
commit
dd5d213508
1 ha cambiato i file con 80 aggiunte e 7 eliminazioni
  1. 80 7
      app/models/agents/website_agent.rb

+ 80 - 7
app/models/agents/website_agent.rb

@@ -137,9 +137,7 @@ module Agents
137 137
       # Check for required fields
138 138
       errors.add(:base, "either url or url_from_event is required") unless options['url'].present? || options['url_from_event'].present?
139 139
       errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present?
140
-      if !options['extract'].present? && extraction_type != "json"
141
-        errors.add(:base, "extract is required for all types except json")
142
-      end
140
+      validate_extract_options!
143 141
 
144 142
       # Check for optional fields
145 143
       if options['mode'].present?
@@ -168,14 +166,89 @@ module Agents
168 166
       end
169 167
 
170 168
       validate_web_request_options!
171
-      validate_extract_options!
172 169
     end
173 170
 
174 171
     def validate_extract_options!
175
-      if extraction_type == "json" && interpolated['extract'].is_a?(Hash)
176
-        unless interpolated['extract'].all? { |name, details| details.is_a?(Hash) && details['path'].present? }
177
-          errors.add(:base, 'When type is json, all extractions must have a path attribute.')
172
+      case extract = interpolated['extract']
173
+      when Hash
174
+        if extract.each_value.any? { |value| !value.is_a?(Hash) }
175
+          errors.add(:base, 'extract must be a hash of hashes.')
176
+        else
177
+          case extraction_type
178
+          when 'html', 'xml'
179
+            extract.each do |name, details|
180
+              case details['css']
181
+              when String
182
+                # ok
183
+              when nil
184
+                case details['xpath']
185
+                when String
186
+                  # ok
187
+                when nil
188
+                  errors.add(:base, "When type is html or xml, all extractions must have a css or xpath attribute (bad extraction details for #{name.inspect})")
189
+                else
190
+                  errors.add(:base, "Wrong type of \"xpath\" value in extraction details for #{name.inspect}")
191
+                end
192
+              else
193
+                errors.add(:base, "Wrong type of \"css\" value in extraction details for #{name.inspect}")
194
+              end
195
+
196
+              case details['value']
197
+              when String, nil
198
+                # ok
199
+              else
200
+                errors.add(:base, "Wrong type of \"value\" value in extraction details for #{name.inspect}")
201
+              end
202
+            end
203
+          when 'json'
204
+            extract.each do |name, details|
205
+              case details['path']
206
+              when String
207
+                # ok
208
+              when nil
209
+                errors.add(:base, "When type is json, all extractions must have a path attribute (bad extraction details for #{name.inspect})")
210
+              else
211
+                errors.add(:base, "Wrong type of \"path\" value in extraction details for #{name.inspect}")
212
+              end
213
+            end
214
+          when 'text'
215
+            extract.each do |name, details|
216
+              case regexp = details['regexp']
217
+              when String
218
+                begin
219
+                  re = Regexp.new(regexp)
220
+                rescue => e
221
+                  errors.add(:base, "invalid regexp for #{name.inspect}: #{e.message}")
222
+                end
223
+              when nil
224
+                errors.add(:base, "When type is text, all extractions must have a regexp attribute (bad extraction details for #{name.inspect})")
225
+              else
226
+                errors.add(:base, "Wrong type of \"regexp\" value in extraction details for #{name.inspect}")
227
+              end
228
+
229
+              case index = details['index']
230
+              when Integer
231
+                # ok
232
+              when String
233
+                if re && !re.names.include?(index)
234
+                  errors.add(:base, "no named capture #{index.inspect} found in regexp for #{name.inspect})")
235
+                end
236
+              when nil
237
+                errors.add(:base, "When type is text, all extractions must have an index attribute (bad extraction details for #{name.inspect})")
238
+              else
239
+                errors.add(:base, "Wrong type of \"index\" value in extraction details for #{name.inspect}")
240
+              end
241
+            end
242
+          else
243
+            errors.add(:base, "Unknown extraction type #{extraction_type.inspect}")
244
+          end
245
+        end
246
+      when nil
247
+        unless extraction_type == 'json'
248
+          errors.add(:base, 'extract is required for all types except json')
178 249
         end
250
+      else
251
+        errors.add(:base, 'extract must be a hash')
179 252
       end
180 253
     end
181 254